package com.rlax.framework.util;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.pdf.PdfConverter;
import org.apache.poi.xwpf.converter.pdf.PdfOptions;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.docx4j.Docx4J;
import org.docx4j.convert.in.Doc;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.w3c.dom.Document;

public class WordUtils {
	
	/**
     * 读取word文件的内容，并转为html
     * @param file word文件
     * @param imgFile 图片引用路径(全路径)
     * @param saveFile 图片保存路径
     */
	public static String doc2Html(File file, final String imgFile, String saveFile) throws TransformerException, IOException, ParserConfigurationException {
		final int ran = (int)(Math.random()*10000);//获取一个随机数，防止图片名字重复
		HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(file));
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToHtmlConverter.setPicturesManager( new PicturesManager(){
        	public String savePicture( byte[] content, PictureType pictureType, String suggestedName,float widthInches, float heightInches ){
        		return imgFile+ran+suggestedName;
        	}
        });
        wordToHtmlConverter.processDocument(wordDocument);
        //保存图片
        List<Picture> pics=wordDocument.getPicturesTable().getAllPictures();
        if(pics!=null){
        	for(int i=0;i<pics.size();i++){
        		Picture pic = (Picture)pics.get(i);
        		try {
        			File f = new File(saveFile);
    				if (!f.exists()) {
    					f.mkdirs();
    				}
        			pic.writeImageContent(new FileOutputStream(saveFile + ran+pic.suggestFullFileName()));
        		} catch (FileNotFoundException e) {
        			e.printStackTrace();
        		}  
        	}
        }
        Document htmlDocument = wordToHtmlConverter.getDocument();
        DOMSource domSource = new DOMSource(htmlDocument);
        StringWriter writer = new StringWriter();
        StreamResult streamResult = new StreamResult(writer);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        writer.close();
        return new String(writer.toString());
	}
	
	/**
     * 获取html标签内代码
     * @param html html代码
     * @param name 标签名，如style、body
     */
	public String html(String html, String name) throws IOException{
		html = html.replace("\r\n","").replace("\r","").replace("\n","");
		Pattern pattern = Pattern.compile("\\<"+name+".*\\>.*\\<\\/"+name+"\\>"); 
		Matcher matcher = pattern.matcher(html);
		String htmlvalue = "";
		while(matcher.find())   
		{ 
		   int i=0;   
		   htmlvalue+=matcher.group(i); 
		   i++; 
		} 
		return htmlvalue;
	}
	
	/**
     * 输入流写入html,生成html文件
     * @param content 内容
     * @param path 文件路径
     */
    public static void writeFile(String content, String path) {
        FileOutputStream fos = null;
        BufferedWriter bw = null;
        try {
            File file = new File(path);
            fos = new FileOutputStream(file,true);
            bw = new BufferedWriter(new OutputStreamWriter(fos,"UTF-8"));
            bw.write(content);
        } catch (FileNotFoundException fnfe) {
            fnfe.printStackTrace();
        } catch (IOException ioe) {
            ioe.printStackTrace();
        } finally {
            try {
            	if (bw != null)
            		bw.close();
                if (fos != null)
                    fos.close();
            } catch (IOException ie) {
            	ie.printStackTrace();
            }
        }
    }
    
	public static void docx2html(File file, File outFile) {
		long startTime = System.currentTimeMillis();

		try {
			XWPFDocument document = new XWPFDocument(new FileInputStream(file));
			XHTMLOptions options = XHTMLOptions.create().indent(4);
	//		// Extract image
	//		File imageFolder = new File("D:/vfsroot/1000000/ueditor_upload/images"
	//				+ fileInName);
	//		options.setExtractor(new FileImageExtractor(imageFolder));
	//		// URI resolver
	//		options.URIResolver(new FileURIResolver(imageFolder));
			OutputStream out = new FileOutputStream(outFile);
			XHTMLConverter.getInstance().convert(document, out, options);
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		
		System.out.println("Generate " + outFile.getName() + " with " + (System.currentTimeMillis() - startTime) + " ms.");
	}
    
	/**
	 * doc转pdf
	 */
	public static void doc2pdf(File file, File outFile) {
		FileInputStream inStream = null;
		FileOutputStream outStream = null;
		try {
			inStream = new FileInputStream(file);
			outStream = new FileOutputStream(outFile);
			WordprocessingMLPackage wordMLPackage = Doc.convert(inStream);
			Docx4J.toPDF(wordMLPackage, outStream);
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				inStream.close();
				outStream.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	
	/**
	 * docx 转 pdf
	 * @param file
	 * @param outFile
	 */
	public static void docx2pdf(File file, File outFile) {
		FileInputStream inStream = null;
		FileOutputStream outStream = null;
		try {
			inStream = new FileInputStream(file);
			outStream = new FileOutputStream(outFile);
	        XWPFDocument document = new XWPFDocument(inStream);
	        PdfOptions options = PdfOptions.create();
	        PdfConverter.getInstance().convert(document, outStream, options);
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				inStream.close();
				outStream.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	
    public static void main(String[] args) throws TransformerException, IOException, ParserConfigurationException {
		String str = doc2Html(new File("D:\\test\\包北合同_2014年9月物贸公司.doc"), null, null);
		writeFile(str, "D:\\test\\包北合同_2014年9月物贸公司doc.html");
		
		docx2html(new File("D:\\test\\包北合同_2014年9月物贸公司.docx"), new File("D:\\test\\包北合同_2014年9月物贸公司docx.html"));
		
		//doc2pdf(new File("D:\\test\\b.doc"), new File("D:\\test\\bdoc.pdf"));
		//docx2pdf(new File("D:\\test\\b.docx"), new File("D:\\test\\bdocx.pdf"));
	}
}
